Load Data In

First, let’s load in our processed data from lab 1.

load("./data/elk_processed.rda")

head(elk_processed)
##              datetime       lon      lat   id
## 1 2001-12-13 07:01:00 -115.8043 52.12410 4049
## 2 2001-12-13 09:01:00 -115.8003 52.11762 4049
## 3 2001-12-14 09:01:00 -115.8281 52.09611 4049
## 4 2001-12-14 11:00:00 -115.8318 52.09829 4049
## 5 2001-12-14 17:02:00 -115.8042 52.09482 4049
## 6 2001-12-14 19:01:00 -115.8037 52.12493 4049

Select Individuals (Non-Residential)

We can one of the visualization methods we learned to examine the tracks for non-residential (migratory) behavior.

library(ggplot2)
ggplot(data=elk_processed, aes(x=lon, y=lat)) +
  geom_path(size=0.5) +
  theme_classic() +
  facet_wrap(~id, scale="free", ncol=3)

We will use the dplyr package and “%in%” condition to select some of the individuals with clear distinctions between residential and transiting behavior (movement “blob” followed by a straight dispersal line and then a second movement “blob”).

library(dplyr)
elk_mig <- elk_processed |>
  filter(id %in% c("GP2", "YL25", "YL29", "YL73", "YL77", "YL78"))

We can visualize our selected tracks, looking at the change in latitude over time. Each track has a clear residential period in latitude, followed by a drop in latitude, a second residential period, and for most, a second spike in latitude as they disperse back to the first area of residency.

ggplot(data=elk_mig, aes(x=datetime, y=lat)) +
  geom_path(size=0.5) +
  theme_classic() +
  facet_wrap(~id, scale="free", ncol=2)

Behavioral Change Point Analysis

The “smoove” package allows for multiple continuous-time movement models (correlated velocity models or CVMs) to be fit to irregular tracking data to identify “modes” of behavior.

Each model comes with it’s own set of parameters and “best” fitting models are determined using AIC/BIC and maximum likelihood.

CVM model options include: Uncorrelated CVM (UCVM), Advective CVM (AVCM), Rotational CVM (RCVM), and Rotational-Advective CVM (RACVM).

A single-change point can also be identified assuming just a UCVM process and sudden changes in the UCVM parameters, time scale, and root mean square speeds.

Check out the “smooth” package vignette for more examples and details.

library(devtools)

install_github("EliGurarie/smoove")
library(smoove)
library(sf)

First, we need to use the methods we learned in lab 1 to convert our data to an sf object and then convert the coordinates to a projected coordinate system. We then extract and store the coordinates and save them as X and Y variables in our data.

elk_utm <- elk_mig |>
  st_as_sf(coords = c("lon","lat"), crs = 4326) |>
  st_transform(32611)

coords <- st_coordinates(elk_utm)

elk_mig$X <- coords[,1]

elk_mig$Y <- coords[,2]

elk_mig$Z <- elk_mig$X + 1i*elk_mig$Y

We will select one of our “migratory” individuals as an example for the analyses, “YL73”.

elk_example <- elk_mig |>
  dplyr::select(id, datetime, X, Y, Z) |>
  dplyr::filter(id == "YL73")

We can use the scantrack function from the “smoove” package to plot this individual’s track in multiple dimensions:

with(elk_example, scan_track(x = X, y = Y, main = "YL73"))

We can now apply the sweepRACVM function to:

1 - set a “window” size to move along the track and detect changes in behavior (size should reflect temporal scale of behavior of interest)

2 - using the set window and the “windowstep” as the step size, “sweep” along the track, and determine the likelihoods of various changepoints.

We will try a window size of ~ 200 days and windowstep of ~ 30 days.

Note: this can take a long time to run. The parallel example may be a better option, especially if your computer has a decent number of cores.

elk_sweep <- with(elk_example, sweepRACVM(Z = Z, T = datetime, windowsize = 200, time.unit = "days", windowstep = 30, model = "RACVM", progress = TRUE))

To use the parallel example, you first have to install and load the “doParallel” package.

library(doParallel)

You then need to use the makeCluster and detectCores functions to define the clusters for parallel processing.

cl <- parallel::makeCluster(parallel::detectCores())

doParallel::registerDoParallel(cl)

The same function can now be run, using the “.parallel = TRUE” argument instead.

elk_sweep <- with(elk_example, sweepRACVM(Z = Z, T = date, windowsize = 200, time.unit = "days", windowstep = 30, model = "RACVM", progress = TRUE, .parallel = TRUE))

The resulting object can be plotted with the plotWindowSweep function to visualize the relative log-likelihood profile for a single window.

Here, you are looking for distinct peaks in the log-likelihood, which indicate significant changepoints.

plotWindowSweep(elk_sweep, main = "YL73")

Now that the likelihood profiles for candidate change points have been determined, the findCandidateChangePoints function can be used to identify the distinct changepoints in the data based on these likelihood profiles.

The “clusterwidth” argument can be used to group changepoints together that may be quite similar.

You can also decide which models to use as candidates for each changepoint segment (UCVM, ACVM, RCVM, RACVM) - here, we use “all” and additionally use AIC for model selection, as BIC can be overly conservative.

Note: If your clusterwidth is too low, the function will give you a warning that some of the candidate changepoints are too close in time to each other to be considered distinct.

elk_cp <- findCandidateChangePoints(windowsweep = elk_sweep, clusterwidth = 6) |>
  getCPtable(modelset = "all", criterion="AIC")

Finally the different behavioral phases corresponding the chosen changepoints can be estimated and summarized using the estimatePhases and summarizePhases functions:

elk_phases <- elk_cp |>
  estimatePhases()
##   phase               start                 end model       eta tau       rms
## 1     I 2004-02-20 00:00:00 2004-05-05 10:00:00  UCVM 10747.467   0 10747.467
## 2    II 2004-05-05 10:00:00 2004-11-02 06:00:00  UCVM  8627.125   0  8627.125
elk_phases_summ <- summarizePhases(elk_phases)
str(elk_phases_summ)
## 'data.frame':    2 obs. of  7 variables:
##  $ phase: chr  "I" "II"
##  $ start: POSIXct, format: "2004-02-20 00:00:00" "2004-05-05 10:00:00"
##  $ end  : POSIXct, format: "2004-05-05 10:00:00" "2004-11-02 06:00:00"
##  $ model: chr  "UCVM" "UCVM"
##  $ eta  : num  10747 8627
##  $ tau  : num  0 0
##  $ rms  : num  10747 8627

We can now merge our summarized phases object, which contains the time points for each changepoint, with our original data, using the tidyr R package function fill to fill in the missing values for each phase’s data.

elk_phases_data <- merge(elk_example, elk_phases_summ, all = TRUE, by.x = c("datetime"), by.y = c("end")) |> arrange(datetime) |>
  tidyr::fill(phase, start, model, eta, tau, rms, .direction = "updown")

str(elk_phases_data)
## 'data.frame':    9615 obs. of  11 variables:
##  $ datetime: POSIXct, format: "2004-02-20 00:02:00" "2004-02-20 01:01:00" ...
##  $ id      : chr  "YL73" "YL73" "YL73" "YL73" ...
##  $ X       : num  602201 600295 599949 599265 599230 ...
##  $ Y       : num  5734480 5734306 5733903 5733562 5733606 ...
##  $ Z       : cplx  6e+05+5.73e+06i 6e+05+5.73e+06i 6e+05+5.73e+06i ...
##  $ phase   : chr  "I" "I" "I" "I" ...
##  $ start   : POSIXct, format: "2004-02-20" "2004-02-20" ...
##  $ model   : chr  "UCVM" "UCVM" "UCVM" "UCVM" ...
##  $ eta     : num  10747 10747 10747 10747 10747 ...
##  $ tau     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ rms     : num  10747 10747 10747 10747 10747 ...

Lastly, we can visualize the different models and phases over time and change in latitude:

ggplot(data = elk_phases_data, aes(x = datetime, y = Y, color = model))+
    geom_path(size=1)+
    xlab("Datetime")+ylab("Latitude")+
    theme_classic()

ggplot(data = elk_phases_data, aes(x = datetime, y = Y, color = phase))+
    geom_path(size=1)+
    xlab("Datetime")+ylab("Latitude")+
    theme_classic()

Hidden Markov Model

Hidden Markov Models (HMMs) can be used for time series data to describe 2 processes, one observed (e.g., the data produced by telemetry, such as locations and movement metrics) and one “hidden”, representing the “hidden” ecological behaviors driving the observed data.

The Markov chain portion of HMM’s state that the probability of being in a particular behavior state at time t+1 is only dependent on the current state at time t. Once these probabilities are described, a transition probability matrix, or the probability of moving between behavior states, can be described. HMM’s can have any number of hidden behavioral states, granted that they are biologically meaningful (model selection can also assist with determining the “optimal” number of states).

Importantly, HMMs are performed in discrete time and assume that observations are “conditionally independent”. These can be difficult for movement data derived from tags, which can be prone to irregularly spaced observations and errors.

Before fitting an HMM to our example elk data, we need to first regularize the locations in time.

Data Regularization (AniMotum)

A useful and very fast package for track regularization in R is the “AniMotum” R package. This package itself actually uses a fast-fitting state space model framework (similar to an HMM but taking into account locational error) to predict locations in regular time along a trajectory.

You can read more about this package with the published paper, Jonsen et al 2023 and with the AniMotum OVerview Vignette.

You will need to install the package using the following code and ensuring that the latest versions of the “TMB” and “Matrix” R packages are also installed.

install.packages("aniMotum", 
                 repos = c("https://cloud.r-project.org",
                           "https://ianjonsen.r-universe.dev"),
                 dependencies = TRUE)


remotes::install_version('TMB', version = '1.9.10')
install.packages("Matrix")
library(aniMotum)

For use with the aniMotum package, your data needs to have the following exact column names, order, and structure: id, date (POSIXct), lc, lon, lat and (optional) additional locational error information (e.g., smaj, smin, eor columns that come with Argos satellite tag data).

The column for lc (location class) can include the following values: 3, 2, 1, 0, A, B, Z, G, or GL. The latter two are for GPS locations and ‘Generic Locations’, respectively.

Class Z values are assumed to have the same error variances as class B. By default, class G (GPS) locations are assumed to have error variances 10x smaller than Argos class 3 variances, but unlike Argos error variances, the GPS variances are the same for longitude and latitude.

Since our data is from GPS collars, we will the class “G” option for the lc column:

elk_mig2 <- elk_mig |>
  mutate(lc = "G", date = datetime) |>
  dplyr::select(id, date, lc, lon, lat)

We now need to choose a sampling rate to predict our locations to.

This should be informed by the most prominent fix rates in your data. You can check this using the following code (see also in lab 1):

dtime <- function(t, ...) {difftime(t[-1], t[-length(t)], ...) %>% as.numeric}

sample.rate <- elk_mig2 |> 
  group_by(id) |>
  arrange(date) |>
  mutate(dtime = c(0,round(dtime(date, units ="hours")))) |>
  mutate(mode_dtime = as.numeric(names(sort(table(dtime), decreasing=TRUE))[1])) |>
  data.frame() |>
  ungroup()

barplot(table(sample.rate$dtime))

A barplot shows us that a sample rate ~ 2 hours might be a good choice.

We can now fit the predictive model to our data, using the fit_ssm function and additional arguments, such as the maximum speed (“vmax”, in m/s, and above which locations will be flagged as outliers), the desired time step (“time.step”, in hours), and the model type (“rw” for random walk and “crw” for correlated random walk).

We will use a reasonable max speed cut off of 20 m/s, a random walk model (works better for large gaps), amd a time step of 2 hours.

elk_ssm_fit <- fit_ssm(elk_mig2,
               vmax = 20,
               model = "rw",
               time.step = 2,
               control = ssm_control(verbose = 0))

We can use the summary function to look at the individual-level model results, including whether the models convered and AIC values.

summary(elk_ssm_fit)
##  Animal id Model Time n.obs n.filt n.fit n.pred n.rr converged    AICc
##        GP2    rw    2  6082      0  6082   3546    .      TRUE  4542.8
##       YL25    rw    2  6413      1  6412   4664    .      TRUE 14777.5
##       YL29    rw    2  5405      3  5402   3806    .      TRUE 10070.9
##       YL73    rw    2  9614      0  9614   3076    .      TRUE  8472.5
##       YL77    rw    2  2802      0  2802   3230    .      TRUE 13299.3
##       YL78    rw    2  3929      0  3929   2810    .      TRUE  6841.3
## 
## --------------
## GP2 
## --------------
##  Parameter Estimate Std.Err
##      rho_p   0.0622  0.0128
##    sigma_x   0.4546  0.0041
##    sigma_y    0.467  0.0042
##      rho_o  -0.5354  1.7234
##      tau_x   0.0266  0.0176
##      tau_y   0.0288  0.0247
## 
## --------------
## YL25 
## --------------
##  Parameter Estimate Std.Err
##      rho_p    0.219  0.0132
##    sigma_x   0.6061  0.0054
##    sigma_y   0.4665  0.0049
##      rho_o  -0.9189  0.0762
##      tau_x    0.159  0.0317
##      tau_y   0.4218  0.0297
## 
## --------------
## YL29 
## --------------
##  Parameter Estimate Std.Err
##      rho_p   0.1668  0.0133
##    sigma_x   0.5614  0.0054
##    sigma_y   0.5372  0.0052
##      rho_o  -0.8604  0.7478
##      tau_x   0.0531  0.0216
##      tau_y   0.0492  0.0266
## 
## --------------
## YL73 
## --------------
##  Parameter Estimate Std.Err
##      rho_p  -0.0781  0.0101
##    sigma_x   0.6058  0.0044
##    sigma_y   0.5927  0.0043
##          .        .       .
##      tau_x   0.0519  0.0178
##      tau_y   0.0542  0.0226
## 
## --------------
## YL77 
## --------------
##  Parameter Estimate Std.Err
##          .        .       .
##    sigma_x   0.5165  0.0059
##    sigma_y    0.464  0.0051
##          .        .       .
##          .        .       .
##          .        .       .
## 
## --------------
## YL78 
## --------------
##  Parameter Estimate Std.Err
##      rho_p  -0.0461  0.0161
##    sigma_x    0.513  0.0058
##    sigma_y   0.5159  0.0059
##      rho_o  -0.6763  0.7341
##      tau_x   0.0571  0.0243
##      tau_y    0.091  0.0404

We can define a function, extractPredictions, to extract the predicted lat/lon coordinates for our data.

extractPredictions <- function(ssm_fit){
  predictions <- data.frame(ssm_fit$predicted) |>
    sf::st_as_sf() |>
    sf::st_transform(4326)
  
  coords <- sf::st_coordinates(predictions)
  
  predictions$lon <- coords[, 1]
  predictions$lat <- coords[, 2]
  
  new_data <- predictions |> 
    data.frame() |>
    dplyr::select(id, date, lon, lat)
  
  return(new_data)
}

We can apply the function to list of fitted models for each individual.

elk_ssm_list <- elk_ssm_fit$ssm

elk_regdata_list <- lapply(elk_ssm_list,
                      extractPredictions)

The predicted locations can be visualized over the original locations, using the ggplot function and storing the plots within a for-loop to print the results for each individual.

library(ggplot2)
elk_id <- split(elk_mig2, elk_mig2$id)

track_plots <- c()
for(i in c(1:length(elk_id))){
  track_plots[[i]] <- ggplot()+
    geom_path(data = elk_regdata_list[[i]], aes(x = lon, y = lat), size = 0.7, color = "red")+
    geom_point(data = elk_id[[i]], aes(x = lon, y = lat), size=1, color="black")+
    theme_classic()+
    xlab("Longitude")+ylab("Latitude")+
    facet_wrap(~id,scales="free")
}

track_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

We can then combine all of the individual dataframes with new predicted locations back together into one large dataframe, using the do.call function.

elk_regdata <- do.call("rbind", elk_regdata_list)

str(elk_regdata)
## 'data.frame':    21132 obs. of  4 variables:
##  $ id  : chr  "GP2" "GP2" "GP2" "GP2" ...
##  $ date: POSIXct, format: "2003-03-15 01:00:00" "2003-03-15 03:00:00" ...
##  $ lon : num  -116 -116 -116 -116 -116 ...
##  $ lat : num  51.7 51.7 51.8 51.8 51.8 ...

Calculate Movement Metrics

Now that our data is regular space/time, we can use the methods we learned in lab 2 to make our data spatial, convert the coordinates to a projected coordinate system, and save the X/Y coordinates as columns in our data.

library(sf)
elk_reg_utm <- elk_regdata |>
  st_as_sf(coords = c("lon","lat"), crs = 4326) |>
  st_transform(32611)
coords <- st_coordinates(elk_reg_utm)

elk_regdata$X <- coords[,1]
elk_regdata$Y <- coords[,2]
elk_regdata$Z <- elk_regdata$X + 1i*elk_regdata$Y

We can then define a function to calculate various movement metrics, as in lab 2:

getMovementMetrics <- function(dataframe){
  
  move_step <- diff(dataframe$Z)  

  time_step <- as.numeric(difftime(dataframe$date[-1], dataframe$date[-length(dataframe$date)], "days"))

  absolute_turnangle <- Arg(move_step)

  relative_turnangle <- diff(absolute_turnangle)

  step_length_km <- Mod(move_step)/1000
  
  dataframe$time_step_days <- c(NA, time_step)

  dataframe$step_length_km <- c(NA, step_length_km)

  dataframe$relative_turnangle <- c(NA, NA, relative_turnangle)
  
  return(dataframe)
}
elk_reg_id <- split(elk_regdata, elk_regdata$id)
elk_reg_id2 <- lapply(elk_reg_id,
                            getMovementMetrics)

elk_reg <- do.call("rbind", elk_reg_id2)

str(elk_reg)
## 'data.frame':    21132 obs. of  10 variables:
##  $ id                : chr  "GP2" "GP2" "GP2" "GP2" ...
##  $ date              : POSIXct, format: "2003-03-15 01:00:00" "2003-03-15 03:00:00" ...
##  $ lon               : num  -116 -116 -116 -116 -116 ...
##  $ lat               : num  51.7 51.7 51.8 51.8 51.8 ...
##  $ X                 : num  597526 597971 597808 597823 597822 ...
##  $ Y                 : num  5733408 5733889 5734679 5734682 5734674 ...
##  $ Z                 : cplx  6e+05+5.73e+06i 6e+05+5.73e+06i 6e+05+5.73e+06i ...
##  $ time_step_days    : num  NA 2 2 2 2 2 2 2 2 2 ...
##  $ step_length_km    : num  NA 0.65518 0.80671 0.01603 0.00791 ...
##  $ relative_turnangle: num  NA NA 0.949 -1.615 -1.886 ...

Fit HMM

Now that our data is regularized and we have our movement metrics annotated to our data, we can fit our HMM to one of the individual’s as an example and see if we can identify discrete behaviors.

We will use the “momentuHMM” package to fit our HMM, a package which fits HMM’s very quickly and with a variety of personalization options (including user-specified parameter distributions, options for hierarchical and multivariate models). You can find out more information with the momentuHMM vignette.

library(momentuHMM)

We will use the same individual as in the BCPA, “YL73”.

elk_example <- elk_reg |>
  dplyr::filter(id == "YL73")

We first prepare our data using the prepData function to grab our X/Y coordinates.

elk_hmm_prep <- prepData(elk_example, type = "UTM",
                                    coordNames = c("X","Y")) 

head(elk_hmm_prep)
##        ID       step      angle   id                date       lon      lat
## 1 Animal1 2324.54229         NA YL73 2004-02-20 00:00:00 -115.5194 51.75189
## 2 Animal1  777.96897 0.14237354 YL73 2004-02-20 02:00:00 -115.5522 51.74711
## 3 Animal1   11.27217 0.03027534 YL73 2004-02-20 04:00:00 -115.5627 51.74456
## 4 Animal1  237.37279 1.22829486 YL73 2004-02-20 06:00:00 -115.5628 51.74452
## 5 Animal1  492.74756 0.74708438 YL73 2004-02-20 08:00:00 -115.5626 51.74239
## 6 Animal1  116.88694 0.81967397 YL73 2004-02-20 10:00:00 -115.5575 51.73933
##                 Z time_step_days step_length_km relative_turnangle        x
## 1 602201+5734480i             NA             NA                 NA 602200.8
## 2 599949+5733904i              2     2.32454229                 NA 599949.0
## 3 599230+5733606i              2     0.77796897         0.14237354 599230.3
## 4 599220+5733601i              2     0.01127217         0.03027534 599220.1
## 5 599239+5733364i              2     0.23737279         1.22829486 599239.2
## 6 599602+5733031i              2     0.49274756         0.74708438 599602.2
##         y
## 1 5734480
## 2 5733904
## 3 5733606
## 4 5733601
## 5 5733364
## 6 5733031

We then make sure that all of our step lengths (“step” within the prepped object data) have values > 0.

elk_hmm_prep$step[elk_hmm_prep$step == 0] <- 1

MomentuHMM specifies HMM’s within a Bayesian framework, where prior distributions can be specified for each state-specific parameter to help distinguish the “true” behavioral state when combined with observed distributions.

We will start with a two-state model with state-specific and distribution specific parameters for each variable that will be used to distinguish our behavioral states, namely the step length and relative turning angles.

We will make our priors somewhat informative, assuming that one state will be defined by smaller step lengths (smaller mean and sd) and bigger turning angles (smaller concentration parameter), with the opposite for the second state.

stepMean0 <- c(m1 = 100, m2 = 4000)

stepSD0 <- c(sd1 = 50, sd2 = 1000)

angleCon0 <- c(rho1  = 0.1, rho2 = 0.8)

We will now assign names to our states, with the slower, more tortuous state being defined as a “resident” state and the faster, straighter state as a “transit” state.

stateNames <- c("resident","transit")

We now pick distributions for our variables of interest (step length and turning angle), using a Gamma distribution for the step lengths (?dgamma) and a Wrapped Cauchy distribution for the turning angles (dwrpcauchy).

We store our priors defined in the code chunk above into a list object, “Par0”.

dist <- list(step = "gamma", angle = "wrpcauchy")

Par0 <- list(step=c(stepMean0, stepSD0), angle = c(angleCon0))

We can now fit our HMM using the fitHMM function with our prepped data and objects from above. We also specify the number of states to identify (“nbStates”).

elk_hmm_fit <- fitHMM(data = elk_hmm_prep, nbStates = 2, dist = dist, 
                      Par0 = Par0, stateNames = stateNames)

print(elk_hmm_fit)
## Value of the maximum log-likelihood: -27262.61 
## 
## 
## step parameters:
## ----------------
##      resident  transit
## mean 215.5848 902.3420
## sd   219.7929 767.8621
## 
## angle parameters:
## -----------------
##                    resident   transit
## mean           0.000000e+00 0.0000000
## concentration 1.458213e-172 0.4219822
## 
## Regression coeffs for the transition probabilities:
## ---------------------------------------------------
##                1 -> 2     2 -> 1
## (Intercept) -1.338506 -0.5050826
## 
## Transition probability matrix:
## ------------------------------
##           resident   transit
## resident 0.7922442 0.2077558
## transit  0.3763470 0.6236530
## 
## Initial distribution:
## ---------------------
##     resident      transit 
## 6.537404e-06 9.999935e-01

After fitting our model, we can use the Viterbi formula with the viterbi function to decode the transition probabilities for the most likely states at each time point along our movement trajectory.

hmm_states <- viterbi(elk_hmm_fit)

str(hmm_states)
##  int [1:3076] 2 2 1 1 1 1 1 2 2 1 ...

We can add our predicted states as a column for each observation to our data:

elk_example$state <- hmm_states

We can then plot the results, using Base R to make multidimensional plots of the trajectory with the annotated states:

layout(cbind(c(1,1),2:3))
par(bty = "l", mar = c(2,2,2,2))

with(elk_example, {
  plot(X, Y, asp =1, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(X[-length(X)], Y[-length(Y)], 
           X[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
  plot(date, X, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
  plot(date, Y, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
})

We can also convert our data to an sf structure and use the methods we learned in lab 1 to plot the trajectory with each location colored by the predicted state with the mapview package.

library(mapview)
elk_example_sf <- elk_example |>
  st_as_sf(coords=c("X","Y"), crs= 32611) |>
  st_transform(4326) |>
  mutate(state = as.character(state))

elk_example_track <- elk_example_sf |>
  summarize(do_union=FALSE) |> 
  st_cast("LINESTRING")

mapview(elk_example_track, color="darkgrey") +
  mapview(elk_example_sf, zcol="state", col.regions=c("orange","blue"))

We can see from the plot that our movement “blobs” still have a mixture of both colors. We can try fitting a multivariate HMM, with latitude as a covariate, to assist with further separating our two states along our movement track.

To fit a multivariate HMM, we define our formula with desired covariate(s), here “y” for latitude.

We then re-fit the model using the fitHMM function, adding formula in as an argument.

formula <- ~y

elk_hmm_fit2 <- fitHMM(data = elk_hmm_prep, nbStates = 2, dist = dist, 
                      Par0 = Par0, stateNames = stateNames, formula = formula)

print(elk_hmm_fit2)
## Value of the maximum log-likelihood: -38803.7 
## 
## 
## step parameters:
## ----------------
##      resident transit
## mean      100    4000
## sd         50    1000
## 
## angle parameters:
## -----------------
##               resident transit
## mean               0.0     0.0
## concentration      0.1     0.8
## 
## Regression coeffs for the transition probabilities:
## ---------------------------------------------------
##             1 -> 2 2 -> 1
## (Intercept)   -1.5   -1.5
## y              0.0    0.0
## 
## Transition probability matrix (based on mean covariate values):
## ---------------------------------------------------------------
##           resident   transit
## resident 0.8175745 0.1824255
## transit  0.1824255 0.8175745
## 
## Initial distribution:
## ---------------------
## resident  transit 
##      0.5      0.5

We can then decode the states and bind them to our dataframe, as before:

hmm_states <- viterbi(elk_hmm_fit2)

elk_example$state <- hmm_states

We can compare the results using a plot again:

layout(cbind(c(1,1),2:3))
par(bty = "l", mar = c(2,2,2,2))

with(elk_example, {
  plot(X, Y, asp =1, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(X[-length(X)], Y[-length(Y)], 
           X[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
  plot(date, X, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
  plot(date, Y, col = c("orange","blue")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue")[state[-length(state)]])
})

There is still quite a bit of blue in our orange “blobs”!

This may suggest that there are actually 3 behavioral states here: one that is fast and straight (blue state), one that is slower and tortuous (orange state), and a third that is intermediate, where the animal is having directed, faster movements within its residential patch.

We can fit a 3 state model by simply adding an additional state-specific prior for each movement variable.

stepMean0 <- c(m1 = 100, m2 = 4000, m3 = 1000)
stepSD0 <- c(sd1 = 50, sd2 = 1000, sd3 = 500)
angleCon0 <- c(rho1  = 0.1, rho2 = 0.8, rho3 = 0.5)

We will label this third state as an additional “faster” residential state.

stateNames <- c("resident-slow","transit", "resident-faster")
dist <- list(step = "gamma", angle = "wrpcauchy")
Par0 <- list(step=c(stepMean0, stepSD0), angle = c(angleCon0))

We re-fit the model as before, but this time specifying 3 states.

formula <- ~y

elk_hmm_fit3 <- fitHMM(data = elk_hmm_prep, nbStates = 3, dist = dist, 
                      Par0 = Par0, stateNames = stateNames, formula = formula)

We can decode our model results and bind the predicted states for each observation back to our data:

hmm_states <- viterbi(elk_hmm_fit3)

elk_example$state <- hmm_states

Now if we plot the results with our new state in green, we can see that our model did a much better job at finding our transiting state and two residential states within our movement “blobs”!

layout(cbind(c(1,1),2:3))
par(bty = "l", mar = c(2,2,2,2))

with(elk_example, {
  plot(X, Y, asp =1, col = c("orange","blue","green")[state], pch = 19, cex = 0.7)
  segments(X[-length(X)], Y[-length(Y)], 
           X[-1], Y[-1], col = c("orange","blue", "green")[state[-length(state)]])
  plot(date, X, col = c("orange","blue", "green")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue", "green")[state[-length(state)]])
  plot(date, Y, col = c("orange","blue", "green")[state], pch = 19, cex = 0.7)
  segments(date[-length(X)], Y[-length(Y)], 
           date[-1], Y[-1], col = c("orange","blue", "green")[state[-length(state)]])
})

elk_example_sf <- elk_example |>
  st_as_sf(coords=c("X","Y"), crs= 32611) |>
  st_transform(4326) |>
  mutate(state = as.character(state))

elk_example_track <- elk_example_sf |>
  summarize(do_union=FALSE) |> 
  st_cast("LINESTRING")

mapview(elk_example_track, color="darkgrey") +
  mapview(elk_example_sf, zcol="state", col.regions=c("orange","blue", "green"))